"""
First function, read_libsvm_dataset, based on the function read_libsvm in 
the file dataset.cpp at https://github.com/stanford-futuredata/wmsketch/blob/master/src/dataset.cpp
"""
import numpy as np

# Read LIBSVM Dataset
# Returns two lists X and y
# X is a list of dictionaries (where key is index of feature and value is the feature)
# y is a list containing the corresponding labels
def read_libsvm_dataset(file_path, binary=False):
    X = []
    y = []
    if binary:
        mode = 'rb'
    else:
        mode = 'r'
    with open(file_path, mode) as libsvm_dataset:
        for line in libsvm_dataset:
            if binary:
                line_delimiter = b' '
            else:
                line_delimiter = ' '
            token_list = line.split(sep=line_delimiter)
            current_label = int(token_list[0])
            current_example = dict()
            for i in range(1, len(token_list)):
                token = token_list[i]
                if binary:
                    delimiter = b':'
                else:
                    delimiter = ':'
                idx_feature_pair = token.split(sep=delimiter)
                idx = int(idx_feature_pair[0])
                value = float(idx_feature_pair[1])
                current_example[idx] = value
            X.append(current_example)
            y.append(current_label)
    return X, y